Import Libaries

# All coin history data was sourced from [Crypto Download Data](https://www.cryptodatadownload.com/)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(devtools)
## Loading required package: usethis
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(xts)
library(coinmarketcapr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.4     ✓ purrr   0.3.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x xts::first()     masks dplyr::first()
## x dplyr::lag()     masks stats::lag()
## x xts::last()      masks dplyr::last()
library(PerformanceAnalytics)
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend

Evaluating Bitcoin

Clean & Read Historical Data

# Read CSV data into R
btc_data <- read.csv("Coinbase_BTCUSD_d.csv", header = TRUE)
# Display the first 6 elements to ensure that the data is read
head(btc_data)
##    Timestamp       Date Symbol     Open     High      Low    Close Volume.BTC
## 1 1605830400 2020-11-20 BTCUSD 17821.58 18239.00 17764.76 18142.52    3909.44
## 2 1605744000 2020-11-19 BTCUSD 17782.91 18193.29 17356.00 17821.58   17141.49
## 3 1605657600 2020-11-18 BTCUSD 17679.36 18488.00 17205.02 17782.91   32425.64
## 4 1605571200 2020-11-17 BTCUSD 16726.64 17880.00 16575.42 17679.36   25230.04
## 5 1605484800 2020-11-16 BTCUSD 15966.89 16892.00 15879.00 16726.64   13948.06
## 6 1605398400 2020-11-15 BTCUSD 16082.01 16175.60 15796.09 15966.89    6250.08
##   Volume.USD
## 1   70437003
## 2  306201498
## 3  579119955
## 4  436549314
## 5  230076772
## 6   99871183
# Order rows by date
btc = btc_data[order(btc_data$Date),]
head(btc)
##       Timestamp       Date Symbol  Open  High    Low Close Volume.BTC
## 2170 1417392000 2014-12-01 BTCUSD 300.0 370.0 300.00 370.0    0.05656
## 2169 1417478400 2014-12-02 BTCUSD 370.0 378.0 370.00 378.0   15.01000
## 2168 1417564800 2014-12-03 BTCUSD 378.0 378.0 377.01 378.0    0.54660
## 2167 1417651200 2014-12-04 BTCUSD 378.0 378.0 377.10 377.1    0.01000
## 2166 1417737600 2014-12-05 BTCUSD 377.1 377.1 377.10 377.1    0.00000
## 2165 1417824000 2014-12-06 BTCUSD 377.1 378.0 377.10 378.0    0.01500
##      Volume.USD
## 2170      19.53
## 2169    5675.07
## 2168     206.52
## 2167       3.77
## 2166       0.00
## 2165       5.67
# Remove Timestamp & Symbol
btc = subset(btc, select = -c(Timestamp, Symbol))
# Check
head(btc)
##            Date  Open  High    Low Close Volume.BTC Volume.USD
## 2170 2014-12-01 300.0 370.0 300.00 370.0    0.05656      19.53
## 2169 2014-12-02 370.0 378.0 370.00 378.0   15.01000    5675.07
## 2168 2014-12-03 378.0 378.0 377.01 378.0    0.54660     206.52
## 2167 2014-12-04 378.0 378.0 377.10 377.1    0.01000       3.77
## 2166 2014-12-05 377.1 377.1 377.10 377.1    0.00000       0.00
## 2165 2014-12-06 377.1 378.0 377.10 378.0    0.01500       5.67
# Convert Date factor into date format
btc$Date <- as.Date(btc$Date, format = "%Y-%m-%d")
head(btc)
##            Date  Open  High    Low Close Volume.BTC Volume.USD
## 2170 2014-12-01 300.0 370.0 300.00 370.0    0.05656      19.53
## 2169 2014-12-02 370.0 378.0 370.00 378.0   15.01000    5675.07
## 2168 2014-12-03 378.0 378.0 377.01 378.0    0.54660     206.52
## 2167 2014-12-04 378.0 378.0 377.10 377.1    0.01000       3.77
## 2166 2014-12-05 377.1 377.1 377.10 377.1    0.00000       0.00
## 2165 2014-12-06 377.1 378.0 377.10 378.0    0.01500       5.67

No irregularities to correct. The substantial variance between prices is due to Bitcoin’s rapid ascent in price over the duration of the dataset. As shown in the next section. ## Historical Price Chart

ds <- data.frame(Date = btc$Date, btc$Close, btc$Volume.USD)

ay <- list(
  tickfont = list(color = "Orange"),
  overlaying = "y",
  side = "right",
  title = "Trade Volume"
)
fig <- plot_ly(ds, x = ~Date)
fig <- fig %>% add_lines(y = ~btc$Close, name = "Price")
fig <- fig %>% add_lines(y = ~btc$Volume.USD, name = "Volume", yaxis = "y2")
fig <- fig %>% layout(
    title = "Bitcoin Price & Trade Volume",
    yaxis2 = ay,
    xaxis = list(
      rangeselector = list(
        buttons = list(
          list(
            count = 3,
            label = "3 mo",
            step = "month",
            stepmode = "backward"),
          list(
            count = 6,
            label = "6 mo",
            step = "month",
            stepmode = "backward"),
          list(
            count = 1,
            label = "1 yr",
            step = "year",
            stepmode = "backward"),
          list(
            count = 1,
            label = "YTD",
            step = "year",
            stepmode = "todate"),
          list(step = "all"))),
      rangeslider = list(type = "date")),
    yaxis = list(
      title = "Price (USD)",
      tickfont = list(color = "Blue"))
)
fig
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

## Display Summary Figures ### 2014-2020 Summary

summary(btc)
##       Date                 Open              High              Low          
##  Min.   :2014-12-01   Min.   :  120.0   Min.   :  184.0   Min.   :    0.06  
##  1st Qu.:2016-05-26   1st Qu.:  468.1   1st Qu.:  482.6   1st Qu.:  459.44  
##  Median :2017-11-19   Median : 3886.6   Median : 3989.5   Median : 3777.14  
##  Mean   :2017-11-22   Mean   : 4770.1   Mean   : 4906.5   Mean   : 4617.20  
##  3rd Qu.:2019-05-27   3rd Qu.: 8282.1   3rd Qu.: 8534.0   3rd Qu.: 8082.48  
##  Max.   :2020-11-20   Max.   :19650.0   Max.   :19892.0   Max.   :19010.00  
##      Close           Volume.BTC       Volume.USD       
##  Min.   :  120.0   Min.   :     0   Min.   :0.000e+00  
##  1st Qu.:  470.5   1st Qu.:  5437   1st Qu.:2.873e+06  
##  Median : 3888.0   Median :  8285   Median :3.278e+07  
##  Mean   : 4778.2   Mean   : 11120   Mean   :6.812e+07  
##  3rd Qu.: 8299.5   3rd Qu.: 13771   3rd Qu.:9.201e+07  
##  Max.   :19650.0   Max.   :160541   Max.   :1.238e+09

2014

btc %>%
  filter(Date >= "2014-12-01") %>%
  filter(Date <= "2014-12-31") %>%
  summary(btc)
##       Date                 Open            High            Low       
##  Min.   :2014-12-01   Min.   :300.0   Min.   :340.0   Min.   :300.0  
##  1st Qu.:2014-12-08   1st Qu.:340.0   1st Qu.:340.0   1st Qu.:340.0  
##  Median :2014-12-16   Median :350.0   Median :350.0   Median :350.0  
##  Mean   :2014-12-16   Mean   :352.5   Mean   :356.4   Mean   :351.2  
##  3rd Qu.:2014-12-23   3rd Qu.:372.5   3rd Qu.:377.6   3rd Qu.:365.2  
##  Max.   :2014-12-31   Max.   :378.0   Max.   :398.0   Max.   :378.0  
##      Close         Volume.BTC        Volume.USD     
##  Min.   :340.0   Min.   : 0.0000   Min.   :   0.00  
##  1st Qu.:340.0   1st Qu.: 0.0000   1st Qu.:   0.00  
##  Median :350.0   Median : 0.0000   Median :   0.00  
##  Mean   :353.8   Mean   : 0.6788   Mean   : 251.83  
##  3rd Qu.:372.5   3rd Qu.: 0.0125   3rd Qu.:   4.72  
##  Max.   :378.0   Max.   :15.0100   Max.   :5675.07

2015

btc %>%
  filter(Date >= "2015-01-01") %>%
  filter(Date <= "2015-12-31") %>%
  summary(btc)
##       Date                 Open            High            Low       
##  Min.   :2015-01-01   Min.   :120.0   Min.   :184.0   Min.   :109.9  
##  1st Qu.:2015-04-02   1st Qu.:235.5   1st Qu.:238.3   1st Qu.:231.8  
##  Median :2015-07-02   Median :249.8   Median :255.8   Median :244.9  
##  Mean   :2015-07-02   Mean   :273.8   Mean   :280.1   Mean   :267.3  
##  3rd Qu.:2015-10-01   3rd Qu.:289.0   3rd Qu.:294.1   3rd Qu.:284.5  
##  Max.   :2015-12-31   Max.   :463.6   Max.   :500.0   Max.   :453.3  
##      Close         Volume.BTC       Volume.USD      
##  Min.   :120.0   Min.   :     0   Min.   :       0  
##  1st Qu.:235.5   1st Qu.:  5148   1st Qu.: 1308791  
##  Median :249.8   Median :  6706   Median : 1690436  
##  Mean   :274.0   Mean   :  7968   Mean   : 2340055  
##  3rd Qu.:289.0   3rd Qu.:  8919   3rd Qu.: 2436511  
##  Max.   :463.6   Max.   :160541   Max.   :71246837

2016

btc %>%
  filter(Date >= "2016-01-01") %>%
  filter(Date <= "2016-12-31") %>%
  summary(btc)
##       Date                 Open            High            Low       
##  Min.   :2016-01-01   Min.   :367.0   Min.   :373.0   Min.   :350.9  
##  1st Qu.:2016-04-01   1st Qu.:432.6   1st Qu.:435.8   1st Qu.:427.6  
##  Median :2016-07-01   Median :583.2   Median :590.5   Median :574.6  
##  Mean   :2016-07-01   Mean   :567.1   Mean   :575.1   Mean   :557.8  
##  3rd Qu.:2016-09-30   3rd Qu.:667.4   3rd Qu.:675.4   3rd Qu.:651.0  
##  Max.   :2016-12-31   Max.   :981.9   Max.   :988.9   Max.   :950.5  
##      Close         Volume.BTC      Volume.USD      
##  Min.   :367.0   Min.   :    0   Min.   :       0  
##  1st Qu.:432.7   1st Qu.: 3973   1st Qu.: 2268601  
##  Median :584.1   Median : 5108   Median : 2734549  
##  Mean   :568.5   Mean   : 6012   Mean   : 3356748  
##  3rd Qu.:667.9   3rd Qu.: 6885   3rd Qu.: 3533321  
##  Max.   :981.9   Max.   :27320   Max.   :17039092

2017

btc %>%
  filter(Date >= "2017-01-01") %>%
  filter(Date <= "2017-12-31") %>%
  summary(btc)
##       Date                 Open              High            Low          
##  Min.   :2017-01-01   Min.   :  801.5   Min.   :  833   Min.   :    0.06  
##  1st Qu.:2017-04-02   1st Qu.: 1195.8   1st Qu.: 1221   1st Qu.: 1171.70  
##  Median :2017-07-02   Median : 2564.0   Median : 2670   Median : 2472.00  
##  Mean   :2017-07-02   Mean   : 3969.6   Mean   : 4153   Mean   : 3777.41  
##  3rd Qu.:2017-10-01   3rd Qu.: 4582.0   3rd Qu.: 4657   3rd Qu.: 4420.01  
##  Max.   :2017-12-31   Max.   :19650.0   Max.   :19892   Max.   :19010.00  
##      Close           Volume.BTC      Volume.USD       
##  Min.   :  801.5   Min.   : 1314   Min.   :1.332e+06  
##  1st Qu.: 1197.3   1st Qu.: 7783   1st Qu.:1.113e+07  
##  Median : 2574.8   Median :12218   Median :3.357e+07  
##  Mean   : 4004.9   Mean   :14866   Mean   :8.326e+07  
##  3rd Qu.: 4599.0   3rd Qu.:18952   3rd Qu.:7.349e+07  
##  Max.   :19650.0   Max.   :90954   Max.   :1.238e+09

2018

btc %>%
  filter(Date >= "2018-01-01") %>%
  filter(Date <= "2018-12-31") %>%
  summary(btc)
##       Date                 Open            High            Low       
##  Min.   :2018-01-01   Min.   : 3183   Min.   : 3229   Min.   : 3130  
##  1st Qu.:2018-04-02   1st Qu.: 6350   1st Qu.: 6450   1st Qu.: 6252  
##  Median :2018-07-02   Median : 6905   Median : 7100   Median : 6668  
##  Mean   :2018-07-02   Mean   : 7548   Mean   : 7759   Mean   : 7263  
##  3rd Qu.:2018-10-01   3rd Qu.: 8670   3rd Qu.: 8933   3rd Qu.: 8280  
##  Max.   :2018-12-31   Max.   :17099   Max.   :17178   Max.   :16251  
##      Close         Volume.BTC      Volume.USD       
##  Min.   : 3183   Min.   :    0   Min.   :        0  
##  1st Qu.: 6350   1st Qu.: 6092   1st Qu.: 39534415  
##  Median : 6894   Median :10000   Median : 68458545  
##  Mean   : 7519   Mean   :12659   Mean   : 98755346  
##  3rd Qu.: 8589   3rd Qu.:16138   3rd Qu.:128945499  
##  Max.   :17099   Max.   :85899   Max.   :658276609

2019

btc %>%
  filter(Date >= "2019-01-01") %>%
  filter(Date <= "2019-12-31") %>%
  summary(btc)
##       Date                 Open            High            Low       
##  Min.   :2019-01-01   Min.   : 3359   Min.   : 3383   Min.   : 3338  
##  1st Qu.:2019-04-11   1st Qu.: 5078   1st Qu.: 5221   1st Qu.: 5014  
##  Median :2019-07-08   Median : 7952   Median : 8118   Median : 7621  
##  Mean   :2019-07-05   Mean   : 7469   Mean   : 7673   Mean   : 7252  
##  3rd Qu.:2019-10-04   3rd Qu.: 9589   3rd Qu.: 9919   3rd Qu.: 9350  
##  Max.   :2019-12-31   Max.   :12927   Max.   :13868   Max.   :12122  
##      Close         Volume.BTC        Volume.USD       
##  Min.   : 3359   Min.   :  361.7   Min.   :  1438461  
##  1st Qu.: 5135   1st Qu.: 6547.9   1st Qu.: 37406908  
##  Median : 7952   Median : 8951.9   Median : 65132772  
##  Mean   : 7479   Mean   :12267.1   Mean   :100360898  
##  3rd Qu.: 9589   3rd Qu.:14751.6   3rd Qu.:123476350  
##  Max.   :12927   Max.   :70145.6   Max.   :887887493

2020

btc %>%
  filter(Date >= "2020-01-01") %>%
  filter(Date <= "2020-11-20") %>%
  summary(btc)
##       Date                 Open            High            Low       
##  Min.   :2020-01-01   Min.   : 4857   Min.   : 5350   Min.   : 3858  
##  1st Qu.:2020-03-22   1st Qu.: 8730   1st Qu.: 8973   1st Qu.: 8521  
##  Median :2020-06-11   Median : 9522   Median : 9686   Median : 9281  
##  Mean   :2020-06-11   Mean   : 9823   Mean   :10052   Mean   : 9590  
##  3rd Qu.:2020-08-31   3rd Qu.:10944   3rd Qu.:11180   3rd Qu.:10736  
##  Max.   :2020-11-20   Max.   :17822   Max.   :18488   Max.   :17765  
##      Close         Volume.BTC       Volume.USD       
##  Min.   : 4857   Min.   :  2305   Min.   : 21216023  
##  1st Qu.: 8758   1st Qu.:  7923   1st Qu.: 76254157  
##  Median : 9526   Median : 11301   Median :106099896  
##  Mean   : 9856   Mean   : 14291   Mean   :135004002  
##  3rd Qu.:10960   3rd Qu.: 16634   3rd Qu.:163703447  
##  Max.   :18143   Max.   :117495   Max.   :608984267

Display Scatter Plots & Find Correlations

btc_cor <- btc[, 2:7]
chart.Correlation(btc_cor)

In the above plot:

  • The distribution of each variable is shown on the diagonal.
  • On the bottom of the diagonal : the bivariate scatter plots with a fitted line are displayed
  • On the top of the diagonal : the value of the correlation plus the significance level as stars
  • Each significance level is associated to a symbol : p-values(0, 0.001, 0.01, 0.05, 0.1, 1) <=> symbols(“”, “”, “”, “.”, " “)

These graphs show us that there is very little difference between the ‘Open’, ‘High’, ‘Low’, and ‘Close’ figures, so we are only going to remove all but the ‘Close’ variable and retest.

# Remove the Open, High and Low variables
btc <- subset(btc, select = -c(Open, High, Low))
# Rename Close variable to Price
names(btc)[names(btc) == "Close"] <- "Price"
# Check 
head(btc)
##            Date Price Volume.BTC Volume.USD
## 2170 2014-12-01 370.0    0.05656      19.53
## 2169 2014-12-02 378.0   15.01000    5675.07
## 2168 2014-12-03 378.0    0.54660     206.52
## 2167 2014-12-04 377.1    0.01000       3.77
## 2166 2014-12-05 377.1    0.00000       0.00
## 2165 2014-12-06 378.0    0.01500       5.67
# Re-run correlation matrix &^ scatter plots
btc_cor <- btc[, 2:4]
chart.Correlation(btc_cor)

- The strongest linear relationship is between Volume.BTC and Volume.USD—as shown by the straight diagonal line and correlation of 0.77. - The correlation between Price and Volume.USD is also high at 0.7 but the correlation betweeen Price and Volume.BTC is just inside the ideal threshold of < 0.4 or > -0.4 at 0.35. Since Volume.BTC it is the only x-variable that has a correlation inside the ideal threshold and it’s with the y-variable we’re trying to predict—Bitcoin-further evaluation will be needed with additional data sets to run a linear regression and more advanced data science methods for predicting the price of Bitcoin. - All values have a p-value of ~0 (as represented by the ***) which means they are have a significant difference which isn’t caused by chance

Bitcoin to Google Search Interest

Add Google Search Data

# Sourced from https://trends.google.com/trends/explore?date=2014-12-01%202020-11-20&geo=US&q=Bitcoin
# Read CSV data into R
search <- read.csv("btc_search_volume.csv", header = TRUE)
# Convert Date factor into date format
search$Date <- as.Date(search$Date, format = "%Y-%m-%d")
# Display the first 6 elements to ensure that the data is read properly
head(search)
##         Date Volume
## 1 2014-12-01      3
## 2 2015-01-01      3
## 3 2015-02-01      3
## 4 2015-03-01      2
## 5 2015-04-01      2
## 6 2015-05-01      2

Display Google Search Volume Summary

summary(search)
##       Date                Volume      
##  Min.   :2014-12-01   Min.   :  2.00  
##  1st Qu.:2016-05-24   1st Qu.:  3.00  
##  Median :2017-11-16   Median :  8.00  
##  Mean   :2017-11-15   Mean   : 10.08  
##  3rd Qu.:2019-05-08   3rd Qu.: 11.00  
##  Max.   :2020-11-01   Max.   :100.00

Format BTC Database

# Create new database that filters for the first day of each month (since this is the only search volume data only available)
# Create startpoints function
startpoints <- function (x, on = "months", k = 1) {
  head(endpoints(x, on, k) + 1, -1)
}
btc_m <- btc
btc_m <- btc_m[startpoints(btc_m$Date, on = "months"),]
head(btc_m)
##            Date  Price Volume.BTC Volume.USD
## 2170 2014-12-01 370.00    0.05656      19.53
## 2139 2015-01-01 340.00    0.00000       0.00
## 2108 2015-02-01 227.00 7401.43000 1640635.74
## 2080 2015-03-01 258.78 4967.35000 1255486.27
## 2049 2015-04-01 247.25 9529.31000 2333725.21
## 2019 2015-05-01 234.01 9697.40000 2287292.43

Combine BTC Month and Google Search Data

btc_search <- data.frame(btc_m, Volume.Search = search$Volume)
# Check
head(btc_search)
##            Date  Price Volume.BTC Volume.USD Volume.Search
## 2170 2014-12-01 370.00    0.05656      19.53             3
## 2139 2015-01-01 340.00    0.00000       0.00             3
## 2108 2015-02-01 227.00 7401.43000 1640635.74             3
## 2080 2015-03-01 258.78 4967.35000 1255486.27             2
## 2049 2015-04-01 247.25 9529.31000 2333725.21             2
## 2019 2015-05-01 234.01 9697.40000 2287292.43             2

Create Dual Y-Axis Comparable Graph

ds <- data.frame(Date = btc_search$Date, btc_search$Price, btc_search$Volume.Search)

ay <- list(
  tickfont = list(color = "Orange"),
  overlaying = "y",
  side = "right",
  title = "Weighted Search Volume"
)
fig <- plot_ly(ds, x = ~Date)
fig <- fig %>% add_lines(x = ~Date, y = ~btc_search$Price, name = "Bitcoin")
fig <- fig %>% add_lines(y = ~btc_search$Volume.Search, name = "Google Search", yaxis = "y2")
fig <- fig %>% layout(
    title = "Bitcoin Price vs. Google Search Volume", 
    yaxis2 = ay,
    xaxis = list(
      rangeselector = list(
        buttons = list(
          list(
            count = 3,
            label = "3 mo",
            step = "month",
            stepmode = "backward"),
          list(
            count = 6,
            label = "6 mo",
            step = "month",
            stepmode = "backward"),
          list(
            count = 1,
            label = "1 yr",
            step = "year",
            stepmode = "backward"),
          list(
            count = 1,
            label = "YTD",
            step = "year",
            stepmode = "todate"),
          list(step = "all"))),
      rangeslider = list(type = "date")),
    yaxis = list(title = "Price (USD)")
  )
fig

## Display Scatter Plots & Find Correlations

btc_search_cor <- btc_search[, 2:5]
chart.Correlation(btc_search_cor)

All correaltions are above the 0.4 threshold so I am going to remove the highest (Volume.USD) and run again.

btc_search_cor <- btc_search[, c(2,3,5)]
chart.Correlation(btc_search_cor)

The Volume.Search variable is still above the 0.4 threshold which’d bring us back to the original correlation matrix above (btc variables only). We can infer further evaluation is needed to predict the price of Bitcoin.

Compare Cryptocurrencies

For simplicity, I am representing the price with the ‘Close’ variable and eliminating ‘Open’, ‘High’, and ‘Low’, for all cryptocurrencies and future finanical x.

Format Coin Data

Bitcion Data

# Rename Close variable to Price
names(btc)[names(btc) == "Price"] <- "BTC.Price"
# Rename Volume.USD to Currency
names(btc)[names(btc) == "Volume.USD"] <- "BTC.Volume.USD"
head(btc)
##            Date BTC.Price Volume.BTC BTC.Volume.USD
## 2170 2014-12-01     370.0    0.05656          19.53
## 2169 2014-12-02     378.0   15.01000        5675.07
## 2168 2014-12-03     378.0    0.54660         206.52
## 2167 2014-12-04     377.1    0.01000           3.77
## 2166 2014-12-05     377.1    0.00000           0.00
## 2165 2014-12-06     378.0    0.01500           5.67

Ethereum Data

# Read CSV data into R
eth_data <- read.csv("Coinbase_ETHUSD_d.csv", header = TRUE)
# Order rows by date
eth = eth_data[order(eth_data$Date),]
# Remove Timestamp & Symbol
eth = subset(eth, select = -c(Unix.Timestamp, Symbol))
# Convert Date factor in date format
eth$Date <- as.Date(eth$Date, format = "%Y-%m-%d")
# Remove the Open, High and Low variables
eth <- subset(eth, select = -c(Open, High, Low))
# Rename Close variable to Price
names(eth)[names(eth) == "Close"] <- "ETH.Price"
# Rename Volume.USD to Currency
names(eth)[names(eth) == "Volume.USD"] <- "ETH.Volume.USD"
# Display the first 6 elements to ensure that the data is read properly
head(eth)
##            Date ETH.Price Volume.ETH ETH.Volume.USD
## 1627 2016-05-27     11.25   13749.03      151147.98
## 1626 2016-05-28     11.93   15810.34      180822.02
## 1625 2016-05-29     12.34    3481.30       42228.37
## 1624 2016-05-30     12.41    4110.59       51655.95
## 1623 2016-05-31     14.00    5654.91       76994.75
## 1622 2016-06-01     13.93   10369.02      145746.12

Litecoin Data

# Read CSV data into R
ltc_data <- read.csv("Coinbase_LTCUSD_d.csv", header = TRUE)
# Order rows by date
ltc = ltc_data[order(ltc_data$Date),]
# Remove Timestamp & Symbol
ltc = subset(ltc, select = -c(Unix.Timestamp, Symbol))
# Convert Date factor into date format
ltc$Date <- as.Date(ltc$Date, format = "%Y-%m-%d")
# Remove the Open, High and Low variables
ltc <- subset(ltc, select = -c(Open, High, Low))
# Rename Close variable to Price
names(ltc)[names(ltc) == "Close"] <- "LTC.Price"
# Rename Volume.USD to Currency
names(ltc)[names(ltc) == "Volume.USD"] <- "LTC.Volume.USD"
# Display the first 6 elements to ensure that the data is read properly
head(ltc)
##            Date LTC.Price Volume.LTC LTC.Volume.USD
## 1539 2016-08-23      3.95     443.86        1737.31
## 1538 2016-08-24      3.84    5083.21       19247.53
## 1537 2016-08-25      3.81    5085.64       19276.60
## 1536 2016-08-26      3.81    3336.89       12746.27
## 1535 2016-08-27      3.78    1140.72        4295.72
## 1534 2016-08-28      3.72    1904.98        7111.87

Ripple Data

# Read CSV data into R
xrp_data <- read.csv("Bitstamp_XRPUSD_d.csv", header = TRUE)
# Order rows by date
xrp = xrp_data[order(xrp_data$Date),]
# Remove Timestamp & Symbol
xrp = subset(xrp, select = -c(Unix.Timestamp, Symbol))
# Convert Date factor into date format
xrp$Date <- as.Date(xrp$Date, format = "%Y-%m-%d")
# Remove the Open, High and Low variables
xrp <- subset(xrp, select = -c(Open, High, Low))
# Rename Close variable to Price
names(xrp)[names(xrp) == "Close"] <- "XRP.Price"
# Rename Volume.USD to Currency
names(xrp)[names(xrp) == "Volume.USD"] <- "XRP.Volume.USD"
# Display the first 6 elements to ensure that the data is read properly
head(xrp)
##            Date XRP.Price Volume.XRP XRP.Volume.USD
## 1392 2017-01-17   0.00683  4509598.6       30673.69
## 1391 2017-01-18   0.00680  5535202.5       38018.93
## 1390 2017-01-19   0.00684  2951225.7       19882.33
## 1389 2017-01-20   0.00660  1713629.9       11374.15
## 1388 2017-01-21   0.00684  2086817.3       13955.92
## 1387 2017-01-22   0.00678   573857.3        3878.76

Monero Data

# Read CSV data into R
xmr_data <- read.csv("Poloniex_XMRUSD_d.csv", header = TRUE)
# Order rows by date
xmr = xmr_data[order(xmr_data$Date),]
# Remove Timestamp & Symbol
xmr = subset(xmr, select = -c(Unix.Timestamp, Symbol))
# Convert Date factor into date format
xmr$Date <- as.Date(xmr$Date, format = "%Y-%m-%d")
# Remove the Open, High and Low variables
xmr <- subset(xmr, select = -c(Open, High, Low))
# Rename Close variable to Price
names(xmr)[names(xmr) == "Close"] <- "XMR.Price"
# Rename Volume.USD to Currency
names(xmr)[names(xmr) == "Volume.USD"] <- "XMR.Volume.USD"
# Display the first 6 elements to ensure that the data is read properly
head(xmr)
##            Date XMR.Price Volume.XMR XMR.Volume.USD
## 2110 2015-01-29    0.3107      68.21          23.86
## 2109 2015-01-30    0.3027     230.00          80.20
## 2108 2015-01-31    0.2896      82.10          24.55
## 2107 2015-02-01    0.2896       0.00           0.00
## 2106 2015-02-02    0.3029      59.58          17.89
## 2105 2015-02-03    0.3311     254.39          90.77

Dash Data

# Read CSV data into R
dash_data <- read.csv("Poloniex_DASHUSD_d.csv", header = TRUE)
# Order rows by date
dash = dash_data[order(dash_data$Date),]
# Remove Timestamp & Symbol
dash = subset(dash, select = -c(Unix.Timestamp, Symbol))
# Convert Date factor into date format
dash$Date <- as.Date(dash$Date, format = "%Y-%m-%d")
# Remove the Open, High and Low variables
dash <- subset(dash, select = -c(Open, High, Low))
# Rename Close variable to Price
names(dash)[names(dash) == "Close"] <- "DASH.Price"
# Rename improperly formatted Volume.DAS & Volume.HUSD
names(dash)[names(dash) == "Volume.DAS"] <- "Volume.DASH"
names(dash)[names(dash) == "Volume.HUSD"] <- "DASH.Volume.USD"
# Display the first 6 elements to ensure that the data is read properly
head(dash)
##            Date DASH.Price Volume.DASH DASH.Volume.USD
## 2091 2015-02-17         10    0.001992         0.01992
## 2090 2015-02-18         10    0.000000         0.00000
## 2089 2015-02-19         10    0.000000         0.00000
## 2088 2015-02-20         10    0.000000         0.00000
## 2087 2015-02-21         10    0.000000         0.00000
## 2086 2015-02-22         10    0.000000         0.00000

Filter Rows for Consistency

# Total number of rows (from XRP database since it has the fewest historical data points)
c_rows <- 1392
# Format Bitcoin
btc_c <- tail(btc,n=c_rows)
head(btc_c)
##            Date BTC.Price Volume.BTC BTC.Volume.USD
## 1392 2017-01-17    907.58    7258.23        6401902
## 1391 2017-01-18    880.00    6654.70        5878776
## 1390 2017-01-19    901.00    5127.03        4602695
## 1389 2017-01-20    894.05    3073.67        2761267
## 1388 2017-01-21    925.36    4545.67        4173036
## 1387 2017-01-22    924.98    4915.78        4556300
# Ethereum
eth <- tail(eth,n=c_rows)
head(eth)
##            Date ETH.Price Volume.ETH ETH.Volume.USD
## 1392 2017-01-17     10.20   69235.88       705202.8
## 1391 2017-01-18     10.23   62085.98       635612.0
## 1390 2017-01-19     10.47   46097.26       477203.6
## 1389 2017-01-20     10.65   60007.76       637465.4
## 1388 2017-01-21     10.99   58646.70       635251.4
## 1387 2017-01-22     10.78   48319.33       525789.5
# Litecoin
ltc <- tail(ltc,n=c_rows)
head(ltc)
##            Date LTC.Price Volume.LTC LTC.Volume.USD
## 1392 2017-01-17      3.93    1774.48        7050.37
## 1391 2017-01-18      3.89    2195.30        8536.72
## 1390 2017-01-19      3.88    3095.56       12060.09
## 1389 2017-01-20      3.88     893.27        3490.55
## 1388 2017-01-21      3.92     620.48        2425.73
## 1387 2017-01-22      3.86    1711.05        6664.88
# Ripple
xrp <- tail(xrp,n=c_rows)
head(xrp)
##            Date XRP.Price Volume.XRP XRP.Volume.USD
## 1392 2017-01-17   0.00683  4509598.6       30673.69
## 1391 2017-01-18   0.00680  5535202.5       38018.93
## 1390 2017-01-19   0.00684  2951225.7       19882.33
## 1389 2017-01-20   0.00660  1713629.9       11374.15
## 1388 2017-01-21   0.00684  2086817.3       13955.92
## 1387 2017-01-22   0.00678   573857.3        3878.76
# Monero
xmr <- tail(xmr,n=c_rows)
head(xmr)
##            Date XMR.Price Volume.XMR XMR.Volume.USD
## 1392 2017-01-16     10.51   11549.89      124038.82
## 1391 2017-01-17     12.33   14986.29      172577.12
## 1390 2017-01-18     12.00   15813.59      192223.22
## 1389 2017-01-19     12.15    6526.12       78925.65
## 1388 2017-01-20     11.88    5532.43       66342.50
## 1387 2017-01-21     12.09    6985.26       84206.72
# Dash
dash <- tail(dash,n=c_rows)
head(dash)
##            Date DASH.Price Volume.DASH DASH.Volume.USD
## 1392 2017-01-16      12.82      417.30         5253.05
## 1391 2017-01-17      14.33     2220.10        30577.09
## 1390 2017-01-18      14.00     2455.66        34117.04
## 1389 2017-01-19      15.16     2766.26        40779.23
## 1388 2017-01-20      15.25     2852.34        43349.21
## 1387 2017-01-21      14.95      721.51        10816.23
# Check row count (find formula that'll return the number of rows for each)

Normalize the Data

Compare With Traditional Investment Vehichles (Equities & bonds)

(representde by the Dow jones and US treasury bonds)

Evaluate Bitcoin Price Compared to the Top 5 Currencies

#plot_top_5_currencies()

Display Large Scatter Plots

# Map the correlation between Bitcoin's closing price and volume traded (in USD)
#plot(btc$Price, btc$Volume.USD, main = 'Price to Volume (USD) Comparison', xlab = 'Closing Price', ylab = 'Volume (USD)')
# Map the correlation between Bitcoin's closing price and volume traded (in BTC)
#plot(btc$Price, btc$Volume.BTC, main = 'Price to Volume (BTC) Comparison', xlab = 'Closing Price', ylab = 'Volume (BTC)')